﻿import tkinter as tk

doc = "道可道非常道名可名非常名"
# 统计分词
def words():
    global doc
    # 返回字符串
    str = text_area.get("1.0", tk.END).strip()
    if str!= '':
        doc = str

    # 初始化集合
    AS = []
    JS = {}
    # 最大值，防止循环过界
    max = len(doc)

    for i in range(max + 1):
        # 截取字符串
        al = doc[i:max]
        # 遍历截取后的字符串
        for ii in range(1, len(al) + 1):
            word = al[:ii]
            AS.append(word)

    # 循环统截取后的字符串，并用JSON对象保存
    for k in AS:
        if k in JS:
            JS[k] += 1
        else:
            JS[k] = 1

    # 开始排序
    data = sorted(JS.items(), key=lambda x: x[1], reverse=True)
    print(data)

# 创建主窗口
root = tk.Tk()
root.title("统计分词")

# 创建文本输入框
text_area = tk.Text(root, width=50, height=10)
text_area.pack(pady=10)

# 创建按钮
button = tk.Button(root, text="开始分词", command=words)
button.pack()

# 运行主循环
root.mainloop()